#
# measureError.R
#
# Estimate the true size of the present
# bias effect on turnout under various
# sizes of measurement error.
#
# Seth Hill, 2018
#

rm(list=ls())
if (.Platform$OS == "windows") {
  # Set working directory in location of script.
  .doit <- function() { # only works with R.exe; trap errors if using Rscript.exe
  frame_files <- lapply(sys.frames(), function(x) x$ofile);frame_files <- Filter(Negate(is.null), frame_files)
  PATH <- dirname(frame_files[[length(frame_files)]]); setwd(PATH) ; rm(PATH,frame_files)
  }
  try(.doit(),silent=T)
}
library(bit64)
library(data.table)
options(stringsAsFactors=F)
par(cex.lab=1.2,cex.axis=1.2)

# %%%%%%%%%%%%%%
# Functions to calculate eiv correction.
# %%%%%%%%%%%%%%

.eiv <- function(b.ols,p.true,p.survey,r0,r1) {
  # Savoca (2000) presents an estimator for
  # the true ols slope when a binary covariate
  # is measured with error. This fn makes that calc.
  # Arguments.
  #  b.ols - beta estimated from ols without correction.
  #  p.true - population proportion x*=1.
  #  p.survey - sample estimate proportion x=1.
  #  r0 - false positive rate p(x=1 when x*=0).
  #  r1 - false negative rate p(x=0 when x*=1).
  # Returns beta corrected for asymptotic measure
  # error.
  num <- b.ols * p.survey * (1-p.survey)
  denom <- p.true * (1-p.true) * (1 - r0 - r1)
  num/denom
}

.ptrue <- function(p.survey,r0,r1) {
  # Calculate true proportion given a survey estimated
  # proportion and given false positive and false
  # negative rates.
  # Arguments.
  #  p.survey - sample estimate proportion x=1.
  #  r0 - false positive rate p(x=1 when x*=0).
  #  r1 - false negative rate p(x=0 when x*=1).
  # Returns implied p.true given arguments.
  
  # p.survey = (1-r0-r1)*p.true + r0*(1-p.true) - r1*p.true
  p.true <- (p.survey - r0)/(1-2*r0-2*r1)
  return(p.true)
}
# Test.
#.ptrue(.22,0,0) # should be .22
#.ptrue(.22,.3,.3)
#.ptrue(.22,1,1)

.makePlot <- function(DT) {
  # Make the plot given input data.table as defined
  # below.
  DT[,plot(x=fn,y=beta.eiv,type='n',ann=F,axes=F,ylim=range(c(0,range(beta.eiv))),xlim=range(c(fn,1.1*max(fn))))]
  axis(1);axis(2,las=2)
  title(xlab="False negative rate",line=2,cex=1.2)
  title(ylab="Coefficient on present bias",cex=1.2)
  # Dashed line for actual estimate.
  DT[fn==0 & fp ==0,abline(h=beta.eiv,lty=2,lwd=2)]
  # Colors for lines.
  fps <- DT[,sort(unique(fp))]
  cols <- gray(seq(0.1,0.8,length.out=length(fps)))
  for (fpv in fps) {
    DT[fp == fpv,lines(x=fn,y=beta.eiv,lwd=3,col=cols[which(fpv == fps)])]
  }
  legend('topleft',legend=sprintf("%1.2f",fps),col=cols,lwd=2,lty=1,title="False positive rate",bty='n',cex=1.2)
  
}

pdf("measureError.pdf",width=6,height=5)
par(mar=c(3.1,4.1,0.6,1.1))

#
# %%%%%%%%%%%%%%%%%%%
# Full sample validated general coef
# (Table 2, column 1): -0.11.
# %%%%%%%%%%%%%%%%%%%
#
# Explore full range of false negatives but,
# on assumption that fn \geq fp, explore fewer fp.
.createFull <- function () {
  full <- CJ(fn=seq(0,0.4,.005),fp=seq(0,.15,.05))
  # Drop obs where fp > fn.
  full <- full[fn >= fp,]
  # Drop obs where abs(1-fp-fn) < .02 because it goes
  # into the denominator in .eiv().
  full <- full[abs(1-fp-fn) > .02,]
  full
}
full <- .createFull()
# Calculate implied p.true given fp and fn.
# Per analyzeTimeInconsistency.do, p.survey=0.213.
full[,p.true := .ptrue(p.survey=.213,r0=fp,r1=fn)]
# Drop cases where implied p.true outside [0,1].
full <- full[p.true >= 0 & p.true <= 1,]

# Calculate beta.eiv.
full[,beta.eiv := .eiv(b.ols=-0.11,p.true=p.true,p.survey=.213,r0=fp,r1=fn)]
# Drop undefined cases.
full <- full[beta.eiv != -Inf & p.true <= .8,]
# Max fn observed in each fp.
full[,max.fn := max(fn),by=fp]

# Text point: "if the false positive rate is 0.15 and 
# the false negative rate is 0.20"
full[fp == 0.15 & fn == 0.20,]

# Make plot.
.makePlot(full)
axis(2,las=2,at=-0.11)

dev.off()
